---
title: "Investigate income options for NSW-NY-ON comparison (NSW)"
output:
  html_document:
    df_print: paged
---


```{r setup,include=FALSE, echo=FALSE}
library(irr)
library(tidyverse)
library(knitr)
library(haven)
library(openxlsx)
opts_knit$set(root.dir = 'C:/Users/kcha0642/Documents/NSW-ON-NY/on-ny-nsw-kcha0642/on-ny-nsw')
```

# SA2 level data

## Median reported income from Census
```{r}
nsw_sa2 = readxl::read_xlsx("./income-investigation/sa2_incomeweekly.xlsx")

# remove sa2 with 0 counts; 
nsw_sa2 %>%
  filter(Total == 0) %>%
  select(`SA2 (UR)`) %>%
  print(`SA2 (UR)`)
  
nsw_sa2 <- nsw_sa2 %>%
  filter(Total != 0) %>%
  select(colnames(nsw_sa2)[!colnames(nsw_sa2)%in%"Total"])

median_groups <- apply(nsw_sa2[,2:16],1,function(x){median_income(x)})
table(median_groups)

sa2_data <- data.frame(`sa2_name` = nsw_sa2$`SA2 (UR)`, 
                       income_report = median_groups)

sa2_data$income_report <- factor(sa2_data$income_report, 
                                    levels = c("Negative income","Nil income","$1-$149 ($1-$7,799)","$150-$299 ($7,800-$15,599)",
                                               "$300-$399 ($15,600-$20,799)","$400-$499 ($20,800-$25,999)","$500-$649 ($26,000-$33,799)",
                                               "$650-$799 ($33,800-$41,599)","$800-$999 ($41,600-$51,999)",
                                               "$1,000-$1,249 ($52,000-$64,999)","$1,250-$1,499 ($65,000-$77,999)",
                                               "$1,500-$1,749 ($78,000-$90,999)","$1,750-$1,999 ($91,000-$103,999)",
                                               "$2,000-$2,999 ($104,000-$155,999)","$3,000 or more ($156,000 or more)"))

sa2_data <- sa2_data %>%
  mutate(quintile_report = ntile(income_report,5))

```

## ATO/ABS personal income by SA2
This excludes government pensions and allowances!
```{r}
sa2_personal = readxl::read_xlsx("./income-investigation/sa2_personal income_atoabs.xlsx")

sa2_data <- sa2_data %>%
  left_join(sa2_personal,by=c("sa2_name" = "sa2 name"))

sa2_data <- sa2_data %>%
  mutate(quintile_personal = ntile(median_income,5))

sa2_data %>%
  select(quintile_report,quintile_personal) %>%
  agree() 
```

```{r,echo = FALSE}
sa2_data %>%
  ggplot(aes(y = median_income, x = quintile_personal)) + 
  geom_jitter()
```

```{r,echo = FALSE}
sa2_data %>%
  ggplot(aes(y = median_income, x = quintile_report)) + 
  geom_jitter()
```

```{r}
sa2_data$sa2 <- as.character(sa2_data$sa2)
write.xlsx(sa2_data,"income_sa2.xlsx")
```